CD7 Quality control for brightfield-based segmentation¶

In [132]:
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
In [133]:
from scip_workflows.common import *
In [134]:
from scip_workflows.core import plot_gate_czi
In [135]:
import time
import pickle
import math
from scip.features import texture
import flowutils
from matplotlib.collections import PatchCollection

from sklearn.preprocessing import scale, robust_scale

from functools import partial

Load processed frame¶

In [136]:
try:
    features = snakemake.input[0]
    output_columns = snakemake.output.columns
    output_index = snakemake.output.index
except NameError:
    # data_dir = Path("/data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/results/scip/202203221745/")
    data_dir = Path("/home/maximl/scratch/data/vsc/datasets/cd7/800/scip/061020221736/")
    output_index = data_dir / "indices" / "index.npy"
    output_columns = data_dir / "indices" / "columns.npy"
    features = data_dir / "features.parquet"
    correction_images = data_dir / "correction_images.pickle"
In [137]:
df = pq.read_table(features).to_pandas()
df = df.set_index(["meta_panel", "meta_replicate", "meta_P", "meta_id"])
df = df.sort_index()
df.shape
Out[137]:
(41194, 1188)

Meta information¶

In [138]:
seaborn.countplot(data=df.reset_index(), y="meta_panel", hue="meta_replicate")
Out[138]:
<AxesSubplot:xlabel='count', ylabel='meta_panel'>
In [139]:
# for now only continue with objects from panel D
df = df.loc["D"]
df.shape
Out[139]:
(41194, 1188)

NaN values¶

In [140]:
# show all NaN columns
df.columns[df.isna().all(axis=0)]
Out[140]:
Index([], dtype='object')

Low variance features¶

In [197]:
v = df.filter(regex="feat").var()
In [198]:
v.isna().sum()
Out[198]:
0
In [199]:
low_var = df.filter(regex="feat").columns[df.filter(regex="feat").var() < 0.001]
In [200]:
low_var
Out[200]:
Index(['feat_euler_number_combined', 'feat_solidity_combined',
       'feat_moments_central-0-1_combined',
       'feat_moments_central-1-0_combined', 'feat_moments_hu-0_combined',
       'feat_moments_hu-1_combined', 'feat_moments_hu-2_combined',
       'feat_moments_hu-3_combined', 'feat_moments_hu-4_combined',
       'feat_moments_hu-5_combined',
       ...
       'feat_combined_glcm_std_homogeneity_5_PGC',
       'feat_combined_glcm_mean_energy_3_PGC',
       'feat_combined_glcm_mean_energy_5_PGC',
       'feat_combined_glcm_std_energy_3_PGC',
       'feat_combined_glcm_std_energy_5_PGC',
       'feat_combined_glcm_std_correlation_3_PGC',
       'feat_combined_glcm_mean_ASM_3_PGC',
       'feat_combined_glcm_mean_ASM_5_PGC', 'feat_combined_glcm_std_ASM_3_PGC',
       'feat_combined_glcm_std_ASM_5_PGC'],
      dtype='object', length=230)
In [201]:
df = df.drop(columns=low_var)

Correction images¶

In [ ]:
with open(correction_images, "rb") as fh:
    correction_images = pickle.load(fh)
In [ ]:
fig, axes = plt.subplots(len(correction_images), 7)
for row, (k, corr_rep) in zip(axes, correction_images.items()):
    row[0].set_title(k)
    for ax, c in zip(row, corr_rep):
        ax.imshow(c)
        ax.set_axis_off()

Well effects¶

In [141]:
df["meta_loc_r"] = df["meta_bbox_minr"] + (df["meta_bbox_maxr"] - df["meta_bbox_minr"]) / 2
df["meta_loc_c"] = df["meta_bbox_minc"] + (df["meta_bbox_maxc"] - df["meta_bbox_minc"]) / 2
In [142]:
border_margin = 30
w, h = 1144, 1144

def is_outside_border(r):
    if (r.meta_loc_r - border_margin < 0) or (r.meta_loc_r + border_margin > h):
        return False
    if (r.meta_loc_c - border_margin < 0) or (r.meta_loc_c + border_margin > w):
        return False
    return True

df["meta_out_border"] = df.apply(is_outside_border, axis="columns")
In [15]:
def draw_tile(data, x, y, *args, channel=0, **kwargs):
    ax = seaborn.scatterplot(data=data, x=x, y=y, **kwargs)
    p, rep = data["meta_P"].iloc[0], data["meta_replicate"].iloc[0]    
    im.set_scene(f"P{p}-D{rep}")
    ax.imshow(numpy.max(im.get_image_data("ZXY", C=channel), axis=0) / correction_images[f"D{rep}"][channel], origin="lower", cmap="viridis")
    ax.set_axis_off()
    print(f"{p}-{rep}", end=" ")
In [ ]:
# DAPI

im = AICSImage(df["meta_path"].iloc[0], reconstruct_mosaic=False)

grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_P", row="meta_replicate", margin_titles=True)
grid.map_dataframe(draw_tile, y="meta_loc_r", x="meta_loc_c", hue="meta_out_border", s=6, edgecolors="none")
for ax in grid.axes.ravel():
    ax.set_axis_off()
In [ ]:
# CD45 - EGFP

im = AICSImage(df["meta_path"].iloc[0], reconstruct_mosaic=False)

grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_P", row="meta_replicate", margin_titles=True)
grid.map_dataframe(draw_tile, y="meta_loc_r", x="meta_loc_c", channel=1, hue="meta_out_border", s=6, edgecolors="none")
for ax in grid.axes.ravel():
    ax.set_axis_off()
1-1 2-1 3-1 4-1 5-1 6-1 7-1 8-1 9-1 10-1 11-1 12-1 13-1 14-1 15-1 16-1 17-1 18-1 19-1 20-1 21-1 22-1 23-1 24-1 25-1 2-2 3-2 4-2 5-2 6-2 7-2 8-2 9-2 10-2 11-2 12-2 13-2 14-2 16-2 17-2 18-2 19-2 20-2 22-2 23-2 24-2 25-2 1-3 2-3 3-3 4-3 5-3 6-3 7-3 8-3 9-3 10-3 11-3 12-3 13-3 14-3 15-3 16-3 17-3 18-3 19-3 20-3 21-3 22-3 23-3 24-3 1-4 2-4 3-4 4-4 5-4 6-4 7-4 8-4 9-4 10-4 11-4 12-4 13-4 14-4 15-4 16-4 17-4 18-4 19-4 20-4 22-4 23-4 24-4 1-5 2-5 3-5 4-5 5-5 6-5 7-5 8-5 9-5 10-5 11-5 12-5 13-5 14-5 15-5 16-5 17-5 18-5 19-5 20-5 22-5 23-5 24-5 
In [ ]:
# siglec8 - RPe
im = AICSImage(df["meta_path"].iloc[0], reconstruct_mosaic=False)

grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_P", row="meta_replicate", margin_titles=True)
grid.map_dataframe(draw_tile, y="meta_loc_r", x="meta_loc_c", channel=2, hue="meta_out_border", s=6, edgecolors="none")
for ax in grid.axes.ravel():
    ax.set_axis_off()
1-1 2-1 3-1 4-1 5-1 6-1 7-1 8-1 9-1 10-1 11-1 12-1 13-1 14-1 15-1 16-1 17-1 18-1 19-1 20-1 21-1 22-1 23-1 24-1 25-1 2-2 3-2 4-2 5-2 6-2 7-2 9-2 10-2 11-2 12-2 13-2 14-2 16-2 17-2 18-2 19-2 20-2 22-2 23-2 24-2 25-2 1-3 2-3 3-3 4-3 5-3 6-3 7-3 8-3 9-3 10-3 11-3 12-3 13-3 14-3 15-3 16-3 17-3 18-3 19-3 20-3 21-3 22-3 23-3 24-3 1-4 2-4 3-4 4-4 5-4 6-4 7-4 8-4 9-4 10-4 11-4 12-4 13-4 14-4 15-4 16-4 17-4 18-4 19-4 20-4 22-4 23-4 24-4 1-5 2-5 3-5 4-5 5-5 6-5 7-5 8-5 9-5 10-5 11-5 12-5 13-5 14-5 15-5 16-5 17-5 18-5 19-5 20-5 22-5 23-5 24-5 
In [ ]:
# CD15 - APC
im = AICSImage(df["meta_path"].iloc[0], reconstruct_mosaic=False)

grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_P", row="meta_replicate", margin_titles=True)
grid.map_dataframe(draw_tile, y="meta_loc_r", x="meta_loc_c", channel=3, hue="meta_out_border", s=6, edgecolors="none")
for ax in grid.axes.ravel():
    ax.set_axis_off()
1-1 2-1 3-1 4-1 5-1 6-1 7-1 8-1 9-1 10-1 11-1 12-1 13-1 14-1 15-1 16-1 17-1 18-1 19-1 20-1 21-1 22-1 23-1 24-1 25-1 2-2 4-2 5-2 6-2 7-2 9-2 10-2 11-2 12-2 13-2 14-2 16-2 17-2 18-2 19-2 20-2 22-2 24-2 25-2 1-3 2-3 3-3 4-3 5-3 6-3 7-3 8-3 9-3 10-3 11-3 12-3 13-3 14-3 15-3 16-3 17-3 18-3 19-3 20-3 21-3 22-3 23-3 24-3 1-4 2-4 3-4 4-4 5-4 6-4 7-4 8-4 9-4 10-4 11-4 12-4 13-4 14-4 15-4 16-4 17-4 18-4 19-4 20-4 22-4 23-4 24-4 1-5 2-5 3-5 4-5 6-5 7-5 8-5 9-5 10-5 11-5 12-5 13-5 14-5 15-5 16-5 17-5 18-5 19-5 20-5 22-5 23-5 24-5 
In [143]:
df = df[df["meta_out_border"]]
In [150]:
df.shape
Out[150]:
(35924, 1191)

Detected regions¶

In [151]:
df = df[df["meta_regions_DAPI"] > 0]
df.shape
Out[151]:
(31984, 1191)
In [155]:
df = df[(df["meta_regions_PGC"] > 0) & (df["meta_regions_Bright"] > 0) & (df["meta_regions_Oblique"] > 0)]
df.shape
Out[155]:
(30603, 1191)

Detecting multiplets¶

In [156]:
def get_gate_czi(sel, df, maxn=200, sort=None, channels=[0]):
    df = df.loc[sel]
    
    if len(df) > maxn:
        df = df.sample(n=maxn)
        
    if sort is not None:
        df = df.sort_values(by=sort)
    
    out = []
    for path, gdf in df.groupby(["meta_path"]):
        ai = AICSImage(path, reconstruct_mosaic=False)
        for scene, gdf2 in gdf.groupby(["meta_scene"]):
            ai.set_scene(scene)
            for tile, gdf3 in gdf2.groupby(["meta_tile"]):
                print(tile, scene, path)
                for (idx, r) in gdf3.iterrows():
                    pixels = ai.get_image_data("CXY", Z=0, T=0, C=channels, M=tile)
                    minr, minc, maxr, maxc = int(r["meta_bbox_minr"]), int(r["meta_bbox_minc"]), int(r["meta_bbox_maxr"]), int(r["meta_bbox_maxc"])

                    out.append(pixels[:, minr:maxr, minc:maxc])
                    
    return out
In [157]:
aspect_ratio = df["feat_major_axis_length_combined"] / df["feat_minor_axis_length_combined"]
In [158]:
sel1 = aspect_ratio > 1.8
out1 = get_gate_czi(sel1, df, maxn=4, channels=[0,4])
0 P12-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P12-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P18-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P8-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
In [159]:
sel2 = aspect_ratio < 1.25
out2 = get_gate_czi(sel2, df, maxn=4, channels=[0,4])
0 P13-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P20-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P5-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P7-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
In [160]:
grid = seaborn.displot(data=aspect_ratio)
grid.ax.axvline(1.8, c="black")
grid.ax.set_xlabel("major axis / minor axis (brightfield)")

ax = grid.fig.add_axes([0.6, 0.8, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out1[0][0], origin="lower")
ax = grid.fig.add_axes([0.6, 0.55, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out1[1][0], origin="lower")
ax = grid.fig.add_axes([0.6, 0.3, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out1[2][0], origin="lower")

ax = grid.fig.add_axes([0.3, 0.8, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out2[0][0], origin="lower")
ax = grid.fig.add_axes([0.3, 0.55, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out2[1][0], origin="lower")
ax = grid.fig.add_axes([0.3, 0.3, 0.2, 0.2], zorder=1)
ax.set_axis_off()
ax.imshow(out2[2][0], origin="lower")

# plt.savefig(data_dir / "figures/qc_major_minor_bf.pdf", bbox_inches='tight', pad_inches=0)
Out[160]:
<matplotlib.image.AxesImage at 0x7f492b448b50>
In [161]:
sel1 = aspect_ratio > 1.5*df.feat_eccentricity_combined + .7
sel2 = df.feat_eccentricity_combined > 0.1
sel3 = df.feat_eccentricity_combined < 0.5
sel4 = aspect_ratio > 1.05

sel5 = df.feat_eccentricity_combined > 0.8
sel6 = aspect_ratio > 1.8

gate1 = get_gate_czi(sel1 & sel2 & sel3 & sel4, df, sort="feat_eccentricity_combined", maxn=3, channels=[4,0])
gate2 = get_gate_czi(sel5 | sel6, df, sort="feat_eccentricity_combined", maxn=3, channels=[4,0])
0 P1-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P10-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P6-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P22-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P4-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P9-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
In [162]:
fig, ax = plt.subplots(dpi=150)
seaborn.scatterplot(
    data=df, 
    x="feat_eccentricity_combined", 
    y=aspect_ratio, 
    hue=(sel1 & sel2 & sel3 & sel4) | sel5 | sel6, 
    s=5, edgecolors="none", ax=ax, legend=False)

for i, im in enumerate(gate1):
    tmp_ax = fig.add_axes([.2 + i*.05, .3 + i*.17, .15, .15], zorder=1)
    tmp_ax.imshow(im[0])
    tmp_ax.set_axis_off()
for i, im in enumerate(gate2):
    tmp_ax = fig.add_axes([.55 + i*.05, .35 + i*.17, .15, .15], zorder=1)
    tmp_ax.imshow(im[0])
    tmp_ax.set_axis_off()
    
ax.set_ylabel("Aspect ratio (major / minor axis length)")
ax.set_xlabel("Eccentricity")
seaborn.despine(fig)

# plt.savefig(data_dir / "figures/ecc_versus_aspect.png", bbox_inches='tight', pad_inches=0, dpi=200)
In [163]:
plot_gate_czi(sel1 & sel2 & sel3 & sel4, df, maxn=5, channels=[4,0])
0 P12-D4 0 P13-D4 0 P18-D2 0 P2-D1 0 P22-D3 
In [164]:
df = df[~((sel1 & sel2 & sel3 & sel4) | sel5 | sel6)]
df.shape
Out[164]:
(30045, 1191)
In [165]:
seaborn.scatterplot(data=df, x="feat_area_combined", y="feat_eccentricity_combined", s=1, alpha=0.5)
Out[165]:
<AxesSubplot:xlabel='feat_area_combined', ylabel='feat_eccentricity_combined'>
In [166]:
sel1 = df["feat_area_combined"] > 5000
sel2 = df["feat_eccentricity_combined"] > .7
plot_gate_czi(sel1 & sel2, df, maxn=10, channels=[4,0])
0 P1-D3 0 P10-D5 0 P13-D2 0 P17-D1 0 P18-D1 0 P2-D1 0 P23-D1 0 P6-D3 0 P7-D5 
In [167]:
df = df[~(sel1 & sel2)]
df.shape
Out[167]:
(30010, 1191)
In [168]:
aspect_ratio = df["feat_major_axis_length_DAPI"] / df["feat_minor_axis_length_DAPI"]
In [169]:
seaborn.displot(data=aspect_ratio)
Out[169]:
<seaborn.axisgrid.FacetGrid at 0x7f49207be640>
In [170]:
sel1 = aspect_ratio > 3
plot_gate_czi(sel1, df, maxn=10, channels=[4,0])
0 P1-D5 0 P14-D3 0 P15-D1 0 P19-D1 0 P21-D3 0 P4-D4 0 P6-D2 0 P9-D5 
In [171]:
df = df[~sel1]
df.shape
Out[171]:
(29967, 1191)
In [172]:
sel1 = aspect_ratio > 2
plot_gate_czi(sel1, df, maxn=10, channels=[4,0])
0 P10-D2 0 P11-D2 0 P13-D2 0 P14-D2 0 P14-D3 0 P15-D3 0 P16-D3 0 P24-D2 0 P3-D4 0 P9-D2 

Texture features¶

In [173]:
df["feat_glcm_mean_contrast_3_Bright"].plot.hist(bins=100)
Out[173]:
<AxesSubplot:ylabel='Frequency'>
In [105]:
sel1 = df["feat_glcm_mean_contrast_3_Bright"] > 9
plot_gate_czi(sel1, df, channel=4, sort="feat_glcm_mean_contrast_3_Bright")
0 P1-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P10-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P11-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P13-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P13-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P14-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P15-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P15-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P15-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P20-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P20-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P20-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P22-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P23-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P23-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P24-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P24-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P3-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P4-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P6-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P6-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P6-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P8-D5 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P9-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P9-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
In [60]:
sel1 = df["feat_glcm_mean_contrast_3_Bright"] < 2
plot_gate_czi(sel1, df, maxn=20, channel=4, sort="feat_glcm_mean_contrast_3_Bright")
0 P11-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P13-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P14-D1 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P15-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P15-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P15-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P16-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P17-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P18-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P19-D2 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P22-D2 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P23-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P25-D1 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P3-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P6-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P9-D1 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P9-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
In [106]:
df["feat_glcm_mean_homogeneity_3_DAPI"].hist(bins=50)
Out[106]:
<AxesSubplot:>
In [174]:
sel1 = df["feat_glcm_mean_homogeneity_3_DAPI"] < .3
plot_gate_czi(sel1, df, maxn=15, channels=[4,0])
0 P1-D3 0 P12-D2 0 P15-D5 0 P17-D2 0 P17-D4 0 P18-D2 0 P18-D4 0 P19-D1 0 P19-D5 0 P23-D1 0 P5-D4 0 P6-D1 0 P6-D3 0 P8-D5 
In [175]:
df = df[~sel1]
df.shape
Out[175]:
(29952, 1191)
In [109]:
sel1 = df["feat_glcm_mean_homogeneity_3_DAPI"] > .7
plot_gate_czi(sel1, df, maxn=15, channel=0)
0 P13-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P16-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P18-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P19-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P2-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P2-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P2-D4 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P20-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P20-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P22-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P25-D1 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P4-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P7-D3 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
0 P9-D2 /home/maximl/scratch/data/vsc/datasets/cd7/800/Experiment-800.czi
In [110]:
df["feat_glcm_mean_contrast_3_DAPI"].plot.hist(bins=100)
Out[110]:
<AxesSubplot:ylabel='Frequency'>
In [76]:
sel1 = df["feat_glcm_mean_contrast_3_DAPI"] > 10
plot_gate_czi(sel1, df, maxn=15, channel=0)
0 P1-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P10-D2 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P11-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P13-D1 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P19-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P2-D2 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P22-D2 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P22-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P23-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P24-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P24-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P4-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P8-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P9-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
In [47]:
sel1 = df["feat_glcm_mean_contrast_3_DAPI"] < 1.5
plot_gate_czi(sel1, df, maxn=15, channel=0)
0 P15-D1 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P18-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P19-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P2-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P20-D2 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P20-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P22-D1 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P22-D2 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P22-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P23-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P24-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P3-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P4-D3 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P7-D4 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi
0 P7-D5 /data/gent/vo/000/gvo00070/vsc42015/datasets/cd7/800/Experiment-800.czi

Aligning feature distributions¶

In [176]:
def map_names(a):
    return {
        "feat_sum_DAPI": "DAPI",
        "feat_sum_EGFP": "CD45",
        "feat_sum_RPe": "Siglec 8",
        "feat_sum_APC": "CD15"
    }[a]
In [177]:
melted_df = pandas.melt(df.reset_index(), id_vars=["meta_P", "meta_replicate"], value_vars=df.filter(regex="feat_sum_(DAPI|EGFP|RPe|APC)").columns)
melted_df.variable = melted_df.variable.apply(map_names)

grid = seaborn.FacetGrid(data=melted_df, col="meta_replicate", row="variable", sharey=False, aspect=1.5, margin_titles=True)
grid.map_dataframe(seaborn.stripplot, x="meta_P", y="value", size=1, alpha=0.5)

grid.set_axis_labels("Well image position", "Fluorescence intensity")
grid.set_titles(col_template="Replicate {col_name}", row_template="{row_name}")

grid.add_legend()

# plt.savefig(data_dir / "figures/qc_intensity_distribution_pre.pdf", bbox_inches='tight', pad_inches=0)
Out[177]:
<seaborn.axisgrid.FacetGrid at 0x7f4925f5b040>

DAPI¶

Below are the DAPI intensities. Image positions 4, 9, 10, 14 and 15 of replicate 3 and 20 of replicate 1 have clearly elevated signals. It can be seen on the overview of image at the top of this notebook that the overal image is a bit brighter. This should be fixable with a min max normalization.

In [178]:
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_sum_DAPI", x="meta_P", s=1, alpha=0.7)
Out[178]:
<seaborn.axisgrid.FacetGrid at 0x7f492be28790>

Here are the cell areas. Image position 6 of replicate 3 shows some problems. When looking at the segmentation positions in the overview image, we can see that the segmentation seems to have underestimated the size of the cells splitting some nuclei in two. It seems best to drop the data from this image position.

In [179]:
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_area_DAPI", x="meta_P", s=1, alpha=0.7)
Out[179]:
<seaborn.axisgrid.FacetGrid at 0x7f4930082220>
In [180]:
grid = seaborn.FacetGrid(
    data=df.groupby(["meta_replicate", "meta_P"])["feat_sum_DAPI"].transform(scale).reset_index(),
    col="meta_replicate"
)
grid.map_dataframe(seaborn.stripplot, y="feat_sum_DAPI", x="meta_P", s=1, alpha=0.7)
Out[180]:
<seaborn.axisgrid.FacetGrid at 0x7f4930450160>
In [181]:
scale_dapi = df.groupby(["meta_replicate", "meta_P"])["feat_sum_DAPI"].transform(scale)

APC¶

In [182]:
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_combined_sum_APC", x="meta_P", s=1, alpha=0.7)
Out[182]:
<seaborn.axisgrid.FacetGrid at 0x7f49c9d29730>
In [183]:
sel1 = df["feat_combined_sum_APC"] > 0.5e7
plot_gate_czi(sel1, df, channels=[4,3])
0 P1-D4 0 P10-D3 0 P14-D3 0 P14-D5 0 P15-D5 0 P20-D1 0 P25-D1 0 P5-D2 0 P6-D2 0 P7-D4 0 P7-D5 
In [184]:
sel1.sum()
Out[184]:
22
In [185]:
df = df[~sel1]
df.shape
Out[185]:
(29930, 1191)
In [186]:
asinh_apc = flowutils.transforms.asinh(df["feat_combined_sum_APC"], channel_indices=None, t=4e6, m=4, a=1)
grid = seaborn.FacetGrid(data=asinh_apc.reset_index(), col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_combined_sum_APC", x="meta_P", s=1, alpha=0.5)
Out[186]:
<seaborn.axisgrid.FacetGrid at 0x7f4924cd1190>
In [187]:
def asinh_scale(x, t):
    return scale(flowutils.transforms.asinh(x, channel_indices=None, t=t, m=4.5, a=1), with_std=False)
In [188]:
asinh_scale_apc = df.groupby(["meta_replicate", "meta_P"])["feat_combined_sum_APC"].transform(lambda x: asinh_scale(x, df["feat_combined_sum_APC"].max())).reset_index()
In [189]:
grid = seaborn.FacetGrid(
    data=asinh_scale_apc,
    col="meta_replicate"
)
grid.map_dataframe(seaborn.stripplot, y="feat_combined_sum_APC", x="meta_P", s=1, alpha=0.7)
Out[189]:
<seaborn.axisgrid.FacetGrid at 0x7f4926929130>
In [190]:
seaborn.displot(
    data=asinh_scale_apc,
    x="feat_combined_sum_APC", hue="meta_replicate"
)
Out[190]:
<seaborn.axisgrid.FacetGrid at 0x7f49a834bfa0>

RPe¶

In [191]:
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_combined_sum_RPe", x="meta_P", s=1, alpha=0.7)
Out[191]:
<seaborn.axisgrid.FacetGrid at 0x7f4933e160a0>
In [192]:
sel1 = df["feat_combined_sum_RPe"] > 0.3e7
plot_gate_czi(sel1, df, channels=[4,2])
0 P25-D1 
In [193]:
sel1.sum()
Out[193]:
3
In [194]:
df = df[~sel1]
df.shape
Out[194]:
(29927, 1191)
In [195]:
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_combined_sum_RPe", x="meta_P", s=1, alpha=0.7)
Out[195]:
<seaborn.axisgrid.FacetGrid at 0x7f4921a35280>
In [196]:
asinh_scale_rpe = df.groupby(["meta_replicate", "meta_P"])["feat_combined_sum_RPe"].transform(lambda x: asinh_scale(x, df["feat_combined_sum_RPe"].max())).reset_index()
In [197]:
grid = seaborn.FacetGrid(data=asinh_scale_rpe, col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_combined_sum_RPe", x="meta_P", s=1, alpha=0.7)
Out[197]:
<seaborn.axisgrid.FacetGrid at 0x7f492bd9a310>
In [198]:
seaborn.displot(
    data=asinh_scale_rpe,
    x="feat_combined_sum_RPe", hue="meta_replicate"
)
Out[198]:
<seaborn.axisgrid.FacetGrid at 0x7f49300b4fa0>

EGFP¶

In [199]:
grid = seaborn.FacetGrid(data=df.reset_index(), col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_combined_sum_EGFP", x="meta_P", s=1, alpha=0.7)
Out[199]:
<seaborn.axisgrid.FacetGrid at 0x7f49307c3910>
In [200]:
asinh_scale_egfp = df.groupby(["meta_replicate", "meta_P"])["feat_combined_sum_EGFP"].transform(lambda x: asinh_scale(x, df["feat_combined_sum_EGFP"].max())).reset_index()
In [201]:
grid = seaborn.FacetGrid(data=asinh_scale_egfp, col="meta_replicate")
grid.map_dataframe(seaborn.stripplot, y="feat_combined_sum_EGFP", x="meta_P", s=1, alpha=0.7)
Out[201]:
<seaborn.axisgrid.FacetGrid at 0x7f490453a6d0>

Export¶

In [202]:
df.shape
Out[202]:
(29927, 1191)
In [203]:
numpy.save(output_index, df.index)
numpy.save(output_columns, df.columns)
In [ ]: